*This code re-codes the 2016 referndum data and prepares the analysis
*The data is merged with the 2013 RCS pre/post data: 1=pre 2013; 2=post 2013; 3=pre 2016; 4=post 2016
/*
Procedure: 
The data comes in wide format. Thus:
1) We code the pre-treatment covariates for all respondents 
2) We keep only the pre-treatment covariates and the DVs 
3) We re-shape the file into long format dropping pre-treatment covariates into single variables, 
while all DVs remain 2 variables (pre post). 
4) We can then subtract pre post Dvs such that we can estimate the change in DiD designs. 
*/

* directory definitions 

	project, doinfo
	global pdir "`r(pdir)'"							// the project's main dir.
	global dofile "`r(dofile)'"						// do-file's stub name
	global data_original = "$pdir/data_original"  //data directory for coded data 
	global data_coded = "$pdir/data_coded"  //data directory for coded data 
	global figures = "$pdir/results/figures"  //data directory for figures
	global tables = "$pdir/results/tables"  //data directory for tables

	project, uses("$data_coded/placebased_regional.dta")
	project, original("$data_original/comune info/handcoded_brigdefile_comuneIDs.dta")

*
	// read RCS 2013 file:
	project, original("$data_original/election studies/ITA2013_RCSitvers2014_11_11/ITA2013_RCS(itvers2014_11_11).dta")
	use "$data_original/election studies/ITA2013_RCSitvers2014_11_11/ITA2013_RCS(itvers2014_11_11).dta", clear 

	** code variables we need:
	gen female_1=0
	replace female_1=1 if Q002==2 

	decode Q004, gen(h1)
	destring h1, replace  
	gen age_1=2013-h1
	drop h1

	gen unemployed_1=0
	replace unemployed_1=1 if profes19==4

	gen class_1=classe if classe<=4

	gen edu_1=titstu if titstu<=4

	gen eco_general_1=Q010 if Q010<=5
	label values eco_general_1 Q010
	revrs eco_general_1, replace 

	gen vote13_1=Q150
	label values vote13_1 Q150

	gen polinterest_1=Q020
	replace polinterest_1=. if polinterest_1>=5
	label values polinterest_1 Q020
	revrs polinterest_1, replace

	gen talk_politics_1=Q021
	replace talk_politics_1=. if Q021>=7
	label values talk_politics_1 Q021

	gen internal_efficacy_1=Q030_01 if Q030_01<=4
	label values internal_efficacy_1 Q030_01
	revrs internal_efficacy_1, replace

	gen pol_complex_1=Q030_02 if Q030_02<=4
	label values pol_complex_1 Q030_02
	revrs pol_complex_1, replace

	gen lr_1=Q032_01
	replace lr_1=. if lr_1>=11

	gen lr_no_1=0 
	replace lr_no_1=1 if Q032_01==11

	** save a temporary file to merge later as a cross-section for pre-treatment infos:
	tempfile 13_pre
	save `13_pre'

	** minimalize data:
	keep ID eco_general_1 polinterest_1 talk_politics_1 vote13_1

	** save a temporary file to merge later to integrate into panel:
	tempfile 13itans
	save `13itans'




*
	// read referendum  file:
	project, original("$data_original/election studies/ITANES_PRE-POST_Referendum_2016/ITANES_POSTREFERENDUM_2016_COS_COMPETITION.dta")
	use "$data_original/election studies/ITANES_PRE-POST_Referendum_2016/ITANES_POSTREFERENDUM_2016_COS_COMPETITION.dta", clear

	*recode of pre-treatment variables. We only use pre-treatment covariates in the final design:  

	gen region_1=s04
	label values region_1 s04

	gen province_1=s06
	label values province_1 s06
	bysort region_1: carryforward province_1, replace

	gen comune_1=s07
	encode comune_1, gen(comune_id_1)

	*to merge with Istata IDs we need to clean the messy municipality IDs: 
	replace comune_1="AlmÃ¨" if comune_1=="AlmÃÆÃÂš"
	replace comune_1="Bolzano/Bozen" if comune_1=="Bolzano"
	replace comune_1="CanicattÃ¬" if comune_1=="CanicattÃÆÃÂ¬"
	replace comune_1="CantÃ¹" if comune_1=="CantÃÆÃÂ¹"
	replace comune_1="Castelfranco Castelfranco Piandiscò" if comune_1=="Castelfranco di Sopra"
	replace comune_1="CittÃ  della Pieve" if comune_1=="Citta della Pieve"
	replace comune_1="Sant’Angelo in Lizzola" if comune_1=="Colbordolo"
	replace comune_1="ForlÃ¬" if comune_1=="ForlÃÆÃÂ¬"
	replace comune_1="GambolÃ²" if comune_1=="GambolÃÆÃÂ²"
	replace comune_1="Laives/Leifers" if comune_1=="Laives"
	replace comune_1="MondovÃ¬" if comune_1=="MondovÃÆÃÂ¬"
	replace comune_1="MuggiÃ²" if comune_1=="MuggiÃÆÃÂ²"
	replace comune_1="NardÃ²" if comune_1=="NardÃÆÃÂ²"
	replace comune_1="San DonÃ  di Piave" if comune_1=="San Dona di Piave"
	replace comune_1="Sant’Angelo in Lizzola" if comune_1=="Sant'Angelo in Lizzola"
	replace comune_1="ScorzÃ¨" if comune_1=="ScorzÃÆÃÂš"
	replace comune_1="SeclÃ¬" if comune_1=="SeclÃÆÃÂ¬"
	replace comune_1="Serra RiccÃ²" if comune_1=="Serra RiccÃÆÃÂ²"
	replace comune_1="Vandoies/Vintl" if comune_1=="Vandoies"

	** now we can merge it with the cleaned bridge file from ministry of interior:
	merge m:1 comune_1 using "$data_original/comune info/handcoded_brigdefile_comuneIDs.dta", gen(_merge_comune)
	fre _merge_comune
	drop if _merge_comune==2

	** some cleanings of variables we do not need:
	drop dup dup2 flags flagcodes value selezionaperiodo time statocivile statciv2 etã sesso sexistat1 tipodiindicatoredemografico tipo_dato15

	gen female_3=0
	replace female_3=1 if s01==2 

	gen age_3=2016-s03

	gen religious_3=0 if s09==7
	replace religious_3=1 if s09!=7

	gen edu_3=s10
	label values edu_3 s10
	recode edu_3 (7 8 9 = 6) (10 11 = 7)

	gen unemployed_3=0
	replace unemployed_3=1 if s11==2

	gen vote13_2=S15
	label values vote13_2 S15

	gen referendum_yes_3=0
	replace referendum_yes_3=. if S27==2
	replace referendum_yes_3=1 if S28==1 

	gen referendum_no_3=0
	replace referendum_no_3=. if S27==2
	replace referendum_no_3=1 if S28==2 

	gen referendum_yes_wunsure_3=0
	replace referendum_yes_wunsure_3=. if S27==2
	replace referendum_yes_wunsure_3=1 if S28==1
	replace referendum_yes_wunsure_3=1 if S29==1 
	
	gen referendum_no_wunsure_3=0
	replace referendum_no_wunsure_3=. if S27==2
	replace referendum_no_wunsure_3=1 if S28==2 
	replace referendum_no_wunsure_3=1 if S29==2 
	
	gen referendum_unsure_3=0
	replace referendum_unsure_3=. if S27==2
	replace referendum_unsure_3=1 if S28==3 | S27==3
	lab var referendum_unsure_3 "unsure in pre-wave"

	gen referendum_yes_4=0
	replace referendum_yes_4=1 if D28_W9==2
	
	gen referendum_no_4=0
	replace referendum_no_4=1 if D28_W9==1
	
	gen referendum_yes_wunsure_4=0
	replace referendum_yes_wunsure_4=1 if D28_W9==2
	
	gen referendum_no_wunsure_4=0
	replace referendum_no_wunsure_4=1 if D28_W9==1
	
	gen turnout_3=0
	replace turnout_3=1 if S27==1
	
	gen turnout_4=1
	replace turnout_4=0 if D28_W9==4
	
	gen eco_general_3=S18
	replace eco_general_3=. if eco_general_3>=6
	label values eco_general_3 S18
	revrs eco_general_3, replace 
	
	gen eco_general_4=D1_W9
	replace eco_general_4=. if eco_general_4>=6
	label values eco_general_4 D1_W9
	revrs eco_general_4, replace 
	
	** We can use some of the pos-treatment variables as "manipulation tests": 
	gen talk_politics_4=D16_W9
	replace talk_politics_4=. if D16_W9>=6
	label values talk_politics_4 D16_W9
	
	gen interest_ref_4=D17_W9
	replace interest_ref_4=. if interest_ref_4==5
	label values interest_ref_4 D17_W9
	revrs interest_ref_4, replace 
	
	gen contact_tv_4=D18_01_W9
	replace contact_tv_4=. if contact_tv_4>=5
	label values contact_tv_4 D18_01_W9
	revrs contact_tv_4, replace
	
	gen contact_paper_4=D18_03_W9
	replace contact_paper_4=. if contact_paper_4>=5
	label values contact_paper_4 D18_03_W9
	revrs contact_paper_4, replace
	
	gen contact_radio_4=D18_05_W9
	replace contact_radio_4=. if contact_radio_4>=5
	label values contact_radio_4 D18_05_W9
	revrs contact_radio_4, replace
	
	gen contact_personal_4=D18_02_W9
	replace contact_personal_4=. if contact_personal_4>=5
	label values contact_personal_4 D18_02_W9
	revrs contact_personal_4, replace
	
	gen contact_politican_4=D18_06_W9
	replace contact_politican_4=. if contact_politican_4>=5
	label values contact_politican_4 D18_06_W9
	revrs contact_politican_4, replace 
	
	gen contact_web_4=D18_07_W9
	replace contact_web_4=. if contact_web_4>=5
	label values contact_web_4 D18_07_W9
	revrs contact_web_4, replace 
	
	gen web_politican_4=D21_01_W9
	replace web_politican_4=. if web_politican_4>=5
	label values web_politican_4 D21_01_W9
	revrs web_politican_4, replace 
	
	gen web_socmedia_4=D21_02_W9
	replace web_socmedia_4=. if web_socmedia_4>=5
	label values web_socmedia_4 D21_02_W9
	revrs web_socmedia_4, replace 
	
	gen web_video_4=D21_03_W9
	replace web_video_4=. if web_video_4>=5
	label values web_video_4 D21_03_W9
	revrs web_video_4, replace 
	
	gen web_share_4=D21_04_W9
	replace web_share_4=. if web_share_4>=5
	label values web_share_4 D21_04_W9
	revrs web_share_4, replace 
	
	gen web_discuss_4=D21_05_W9
	replace web_discuss_4=. if web_discuss_4>=5
	label values web_discuss_4 D21_05_W9
	revrs web_discuss_4, replace 
	
	gen web_event_4=D21_06_W9
	replace web_event_4=. if web_event_4>=5
	label values web_event_4 D21_06_W9
	revrs web_event_4, replace 
	
	** Now we can reshape the data from wide to long: 
	gen id=_n
	gen date=SERIE

	** keep ID region-date
	order id ID date

	** ok some do not have IDs (new ones) but we want to keep them, so we assign them a non existent ID:
	replace ID=999999999+id if ID==.

	merge 1:1 ID using "`13itans'"
	drop if _merge==2
	drop _merge

	reshape long ///
	vote08_ idate_ talk_politics_ ///
	turnout_ referendum_yes_ referendum_yes_wunsure_ referendum_no_ referendum_no_wunsure_ ///
	eco_general_ reform_general_ ///
	, i(id) j(wave)  

	** correct some labels: 
	foreach var of varlist *_ {
		local newname = regexr("`var'","_$","") 
		rename `var' `newname'
	}

	** post variables should be missing other than post wave:
	foreach var of varlist *_4 {
		replace `var'=. if wave!=4
	}

	** take care of labels:
	label values vote08 Q151
	label values idate DATA
	label values talk_politics Q021
	label values vote13_1 Q150
	

	xtset id wave

	order *_1 *_4 
	order id ID wave date idate region_1 province_1 comune_1 comune_id_1

	save "$data_coded/placebased_individual.dta", replace 




*
	// now we can measure the panel data again with our M5S infos: 

	** Here we open the m5s meetup data, do some cleanings:
	project, original("$data_coded/events_comune_neigh_nga.csv")
	import delimited "$data_coded/events_comune_neigh_nga.csv", clear 

	*import meetup data, clear and save temporary
	foreach w of varlist n_* wn_* hist_* {
		replace `w'=0 if `w'==.
		*destring `w', replace /* thomas, dec 3: NAs in csv now formatted stata style --> is already numeric, na==. */
	}
	
	*we standardize the exposure variable as discussed in the paper on p.XXX
	foreach var of varlist n_total-hist_wn_late {
		gen std_`var'=log((`var'/log(comune_population))+1)
	}
	
	tempfile meetup
	save `meetup'




*
	// import panel data and merge with M5S infos:
	use "$data_coded/placebased_individual.dta", clear
	
	rename ïitter107 comune_id
	destring comune_id, replace
	
	merge m:1 comune_id using "`meetup'", gen(_merge_meetup)
	drop if _merge_meetup==2
		
*
	// cleanings/codings: 

	replace comune_id_1=. if comune_id_1==1 
	*
	gen m5s_no=0
	replace m5s_no=1 if n_total==0
	replace m5s_no=. if n_total==.
	*
	gen ever_treated=0
	replace ever_treated=1 if n_total>=1
	replace ever_treated=. if n_total==.
	*
	gen referendum=0 if wave==3
	replace referendum=1 if wave==4
	*
	gen no_comune=0
	replace no_comune=1 if comune_id_1==1 | comune_id_1==.
	*
	gen m5s_1=0
	replace m5s_1=1 if vote13_1==4
	replace m5s_1=. if vote13_1==.
	*
	gen pd_1=0
	replace pd_1=1 if vote13_1==2
	replace pd_1=. if vote13_1==.
	*
	gen pdl_1=0
	replace pdl_1=1 if vote13_1==1
	replace pdl_1=. if vote13_1==.
	*
	gen vote_main_1=0
	replace vote_main_1=1 if vote13_1==2 
	replace vote_main_1=2 if vote13_1==4 
	replace vote_main_1=3 if vote13_1==1 
	replace vote_main_1=4 if vote13_1==5
	replace vote_main_1=. if vote13_1==. 
	label define main_vote 1 "PD" 2 "M5S" 3 "PDL" 4 "Monti"  0 "other"
	label values vote_main_1 main_vote
	*
	gen h1=0
	replace h1=1 if reform_general==6 & wave==3
	replace h1=. if reform_general==. & wave==3
	bysort id: egen undecided=max(h1)
	drop h1
	*
	gen h1=reform_general if wave==3
	bysort id: egen reform_general_3=max(h1)
	drop h1

	*different treatments across time: 
	
	gen wave_n_treat_157=n_pre_157d if wave==3 
	replace wave_n_treat_157=n_treat_157d if wave==4
	
	gen wave_wn_treat_157d=wn_pre_157d if wave==3 
	replace wave_wn_treat_157d=wn_treat_157d if wave==4 
	
	gen wave_neigh_wn_treat_157d=wn_neigh_pre_157d if wave==3 
	replace wave_neigh_wn_treat_157d=wn_neigh_treat_157d if wave==4 
	
	gen wave_n_treat_120=n_pre_120d if wave==3 
	replace wave_n_treat_120=n_treat_120d if wave==4
	
	gen wave_n_treat_90=n_pre_90d if wave==3 
	replace wave_n_treat_90=n_treat_90d if wave==4
	
	gen wave_n_treat_60=n_pre_60d if wave==3 
	replace wave_n_treat_60=n_treat_60d if wave==4
	
	gen wave_n_treat_30=n_pre_30d if wave==3 
	replace wave_n_treat_30=n_treat_30d if wave==4
	
	gen wave_wn_treat_157d_in=wn_pre_loc_indoor_157d if wave==3 
	replace wave_wn_treat_157d_in=wn_treat_loc_indoor_157d if wave==4 
	
	gen wave_wn_treat_157d_out=wn_pre_loc_outdoor_157d if wave==3 
	replace wave_wn_treat_157d_out=wn_treat_loc_outdoor_157d if wave==4 
	
	** to standardize exposure as discussed in paper we need to merge with regional level data: 
	drop _merge*
	merge m:1 comune_id using "$data_coded/placebased_regional.dta", force
	drop if _merge==2
	drop _merge

	*standardize M5S infos by pop:
	foreach var of varlist wave_n_treat_157-wave_wn_treat_157d_out {
	gen std_`var'=log((`var'/exposure_pop)+1)
	}

	* some cleanings of identifiers:
	drop province_id region_id 
	gen province_id=province_1
	gen region_id=region_1
	
	*
	xtset
	gen lturnout=l.turnout
	
	gen turnout_noyes=0 if lturnout==0 
	replace turnout_noyes=1 if (turnout==1 & wave==4) & lturnout==0
	
	gen post=0 if wave==3
	replace post=1 if wave==4
	
	rename n_total m5s_total
	
	rename std_wave_wn_treat_157d m5s_wn_wave_157
	lab var m5s_wn_wave_157 "M5S: campaign"
	
	rename std_wave_n_treat_157 m5s_n_wave_157
	lab var m5s_n_wave_157 "M5S: campaign"
	
	lab var m5s_no "M5S: never exposed"
	
	*
	gen m5s_ref_yn=0
	replace m5s_ref_yn=1 if m5s_n_wave_157>0
	replace m5s_ref_yn=. if m5s_n_wave_157==.
	*
	bysort comune_id: egen h1=max(m5s_n_wave_157)
	drop m5s_ref_ever
	gen m5s_ref_ever=0
	replace m5s_ref_ever=1 if h1>0
	replace m5s_ref_ever=. if m5s_n_wave_157==.
	drop h1 
	
	*
	gen m5s_ref_neig_yn=0
	replace m5s_ref_neig_yn=1 if std_wave_neigh_wn_treat_157d>0
	replace m5s_ref_neig_yn=. if std_wave_neigh_wn_treat_157d==.
	*
	bysort comune_id: egen h1=max(std_wave_neigh_wn_treat_157d)
	gen m5s_ref_neigh_ever=0
	replace m5s_ref_neigh_ever=1 if h1>0
	replace m5s_ref_neigh_ever=. if std_wave_neigh_wn_treat_157d==.
	drop h1 
	
	
	merge m:1 ID using "`13_pre'", force
	drop if _merge==2
	drop _merge
	
	lab var unemployed_3 "unemployed (0,1)"
	lab var unemployed_1 "unemployed (0,1)"
	lab var female_3 "female (0,1)"
	lab var female_1 "female (0,1)"
	lab var age_3 "age (18-88)"
	lab var age_1 "age (18-89)"
	lab var edu_3 "education (1-7)"
	lab var edu_1 "education (1-7)"
	lab var religious_3 "religiosity (	0,1)"
	lab var std_wave_n_treat_90 "M5S: 3 months before referendum"
	lab var std_wave_n_treat_60 "M5S: 2 months before referendum"
	lab var std_wave_n_treat_30 "M5S: 1 month before referendum"
	lab var polinterest_1 "political interest (1-4)"
	lab var m5s_1 "M5S voter in 2013 (0,1)"
	lab var pd_1 "PD voter in 2013 (0,1)"
	lab var pdl_1 "PdL voter in 2013 (0,1)"
	lab var lr_1 "left-right (1-10)"
	lab var m5s_n_wave_157 "M5S: referendum {it:(events only)}"
	lab var m5s_wn_wave_157 "M5S: referendum"
	lab var m5s_ref_yn "M5S: binary"
	lab var eco_general_1 "economy retrospective (1-5)"
	lab var lr_no_1 "explicitly no lr placement (0,1)"
	lab var talk_politics_1 "talk politics (1-6)"
	lab var pol_complex_1 "politics too complex (1-4)"
	lab var internal_efficacy_1 "internal efficacy (1-4)"
	lab var referendum_no "vote no (0,1)"
	lab var referendum_no_wunsure "vote no (0,1)"
	lab var referendum_yes "vote yes (0,1)"
	lab var referendum_yes_wunsure "vote yes (0,1)"
	lab var std_wave_wn_treat_157d_in "M5S indoor"
	lab var std_wave_wn_treat_157d_out "M5S outdoor"
	
	*mean imputation: 
	foreach var of varlist age_3 edu_3 {
		egen `var'_mean=mean(`var')
		gen `var'_imp=`var'
		replace `var'_imp=`var'_mean if `var'_imp==.
	}
	
	save "$data_coded/placebased_individual.dta", replace 

	*codebookout "$data_coded/codebookplacebased_individual"

	project, creates("$data_coded/placebased_individual.dta")
